; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=riscv32 \
; RUN:   | FileCheck %s --check-prefixes=RV32,RV32ALIGNED
; RUN: llc < %s -mtriple=riscv64 \
; RUN:   | FileCheck %s --check-prefixes=RV64,RV64ALIGNED
; RUN: llc < %s -mtriple=riscv32 -mattr=+unaligned-scalar-mem \
; RUN:   | FileCheck %s --check-prefixes=RV32,RV32UNALIGNED
; RUN: llc < %s -mtriple=riscv64 -mattr=+unaligned-scalar-mem \
; RUN:   | FileCheck %s --check-prefixes=RV64,RV64UNALIGNED
%struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }

@src = external dso_local global %struct.x
@dst = external dso_local global %struct.x

@.str1 = private unnamed_addr constant [31 x i8] c"DHRYSTONE PROGRAM, SOME STRING\00", align 1
@.str2 = private unnamed_addr constant [36 x i8] c"DHRYSTONE PROGRAM, SOME STRING BLAH\00", align 1
@.str3 = private unnamed_addr constant [24 x i8] c"DHRYSTONE PROGRAM, SOME\00", align 1
@.str4 = private unnamed_addr constant [18 x i8] c"DHRYSTONE PROGR  \00", align 1
@.str5 = private unnamed_addr constant [7 x i8] c"DHRYST\00", align 1
@.str6 = private unnamed_addr constant [14 x i8] c"/tmp/rmXXXXXX\00", align 1
@spool.splbuf = internal global [512 x i8] zeroinitializer, align 16

define i32 @t0() {
; RV32-LABEL: t0:
; RV32:       # %bb.0: # %entry
; RV32-NEXT:    lui a0, %hi(src)
; RV32-NEXT:    lw a1, %lo(src)(a0)
; RV32-NEXT:    lui a2, %hi(dst)
; RV32-NEXT:    sw a1, %lo(dst)(a2)
; RV32-NEXT:    addi a0, a0, %lo(src)
; RV32-NEXT:    lbu a1, 10(a0)
; RV32-NEXT:    lh a3, 8(a0)
; RV32-NEXT:    lw a0, 4(a0)
; RV32-NEXT:    addi a2, a2, %lo(dst)
; RV32-NEXT:    sb a1, 10(a2)
; RV32-NEXT:    sh a3, 8(a2)
; RV32-NEXT:    sw a0, 4(a2)
; RV32-NEXT:    li a0, 0
; RV32-NEXT:    ret
;
; RV64-LABEL: t0:
; RV64:       # %bb.0: # %entry
; RV64-NEXT:    lui a0, %hi(src)
; RV64-NEXT:    ld a1, %lo(src)(a0)
; RV64-NEXT:    lui a2, %hi(dst)
; RV64-NEXT:    addi a0, a0, %lo(src)
; RV64-NEXT:    lbu a3, 10(a0)
; RV64-NEXT:    lh a0, 8(a0)
; RV64-NEXT:    sd a1, %lo(dst)(a2)
; RV64-NEXT:    addi a1, a2, %lo(dst)
; RV64-NEXT:    sb a3, 10(a1)
; RV64-NEXT:    sh a0, 8(a1)
; RV64-NEXT:    li a0, 0
; RV64-NEXT:    ret
entry:
  call void @llvm.memcpy.p0.p0.i32(ptr align 8 @dst, ptr align 8 @src, i32 11, i1 false)
  ret i32 0
}

define void @t1(ptr nocapture %C) nounwind {
; RV32-LABEL: t1:
; RV32:       # %bb.0: # %entry
; RV32-NEXT:    lui a1, %hi(.L.str1)
; RV32-NEXT:    addi a1, a1, %lo(.L.str1)
; RV32-NEXT:    li a2, 31
; RV32-NEXT:    tail memcpy@plt
;
; RV64ALIGNED-LABEL: t1:
; RV64ALIGNED:       # %bb.0: # %entry
; RV64ALIGNED-NEXT:    lui a1, %hi(.L.str1)
; RV64ALIGNED-NEXT:    addi a1, a1, %lo(.L.str1)
; RV64ALIGNED-NEXT:    li a2, 31
; RV64ALIGNED-NEXT:    tail memcpy@plt
;
; RV64UNALIGNED-LABEL: t1:
; RV64UNALIGNED:       # %bb.0: # %entry
; RV64UNALIGNED-NEXT:    lui a1, %hi(.L.str1)
; RV64UNALIGNED-NEXT:    ld a2, %lo(.L.str1)(a1)
; RV64UNALIGNED-NEXT:    sd a2, 0(a0)
; RV64UNALIGNED-NEXT:    lui a2, 4
; RV64UNALIGNED-NEXT:    addiw a2, a2, 1870
; RV64UNALIGNED-NEXT:    sh a2, 28(a0)
; RV64UNALIGNED-NEXT:    lui a2, 300325
; RV64UNALIGNED-NEXT:    addiw a2, a2, 1107
; RV64UNALIGNED-NEXT:    addi a1, a1, %lo(.L.str1)
; RV64UNALIGNED-NEXT:    ld a3, 16(a1)
; RV64UNALIGNED-NEXT:    ld a1, 8(a1)
; RV64UNALIGNED-NEXT:    sw a2, 24(a0)
; RV64UNALIGNED-NEXT:    sb zero, 30(a0)
; RV64UNALIGNED-NEXT:    sd a3, 16(a0)
; RV64UNALIGNED-NEXT:    sd a1, 8(a0)
; RV64UNALIGNED-NEXT:    ret
entry:
  tail call void @llvm.memcpy.p0.p0.i64(ptr %C, ptr @.str1, i64 31, i1 false)
  ret void
}

define void @t2(ptr nocapture %C) nounwind {
; RV32-LABEL: t2:
; RV32:       # %bb.0: # %entry
; RV32-NEXT:    lui a1, %hi(.L.str2)
; RV32-NEXT:    addi a1, a1, %lo(.L.str2)
; RV32-NEXT:    li a2, 36
; RV32-NEXT:    tail memcpy@plt
;
; RV64ALIGNED-LABEL: t2:
; RV64ALIGNED:       # %bb.0: # %entry
; RV64ALIGNED-NEXT:    lui a1, %hi(.L.str2)
; RV64ALIGNED-NEXT:    addi a1, a1, %lo(.L.str2)
; RV64ALIGNED-NEXT:    li a2, 36
; RV64ALIGNED-NEXT:    tail memcpy@plt
;
; RV64UNALIGNED-LABEL: t2:
; RV64UNALIGNED:       # %bb.0: # %entry
; RV64UNALIGNED-NEXT:    lui a1, %hi(.L.str2)
; RV64UNALIGNED-NEXT:    ld a2, %lo(.L.str2)(a1)
; RV64UNALIGNED-NEXT:    sd a2, 0(a0)
; RV64UNALIGNED-NEXT:    lui a2, 1156
; RV64UNALIGNED-NEXT:    addiw a2, a2, 332
; RV64UNALIGNED-NEXT:    addi a1, a1, %lo(.L.str2)
; RV64UNALIGNED-NEXT:    ld a3, 24(a1)
; RV64UNALIGNED-NEXT:    ld a4, 16(a1)
; RV64UNALIGNED-NEXT:    ld a1, 8(a1)
; RV64UNALIGNED-NEXT:    sw a2, 32(a0)
; RV64UNALIGNED-NEXT:    sd a3, 24(a0)
; RV64UNALIGNED-NEXT:    sd a4, 16(a0)
; RV64UNALIGNED-NEXT:    sd a1, 8(a0)
; RV64UNALIGNED-NEXT:    ret
entry:
  tail call void @llvm.memcpy.p0.p0.i64(ptr %C, ptr @.str2, i64 36, i1 false)
  ret void
}

define void @t3(ptr nocapture %C) nounwind {
; RV32ALIGNED-LABEL: t3:
; RV32ALIGNED:       # %bb.0: # %entry
; RV32ALIGNED-NEXT:    lui a1, %hi(.L.str3)
; RV32ALIGNED-NEXT:    addi a1, a1, %lo(.L.str3)
; RV32ALIGNED-NEXT:    li a2, 24
; RV32ALIGNED-NEXT:    tail memcpy@plt
;
; RV64ALIGNED-LABEL: t3:
; RV64ALIGNED:       # %bb.0: # %entry
; RV64ALIGNED-NEXT:    lui a1, %hi(.L.str3)
; RV64ALIGNED-NEXT:    addi a1, a1, %lo(.L.str3)
; RV64ALIGNED-NEXT:    li a2, 24
; RV64ALIGNED-NEXT:    tail memcpy@plt
;
; RV32UNALIGNED-LABEL: t3:
; RV32UNALIGNED:       # %bb.0: # %entry
; RV32UNALIGNED-NEXT:    lui a1, 1109
; RV32UNALIGNED-NEXT:    addi a1, a1, -689
; RV32UNALIGNED-NEXT:    sw a1, 20(a0)
; RV32UNALIGNED-NEXT:    lui a1, 340483
; RV32UNALIGNED-NEXT:    addi a1, a1, -947
; RV32UNALIGNED-NEXT:    sw a1, 16(a0)
; RV32UNALIGNED-NEXT:    lui a1, 267556
; RV32UNALIGNED-NEXT:    addi a1, a1, 1871
; RV32UNALIGNED-NEXT:    sw a1, 12(a0)
; RV32UNALIGNED-NEXT:    lui a1, 337154
; RV32UNALIGNED-NEXT:    addi a1, a1, 69
; RV32UNALIGNED-NEXT:    sw a1, 8(a0)
; RV32UNALIGNED-NEXT:    lui a1, 320757
; RV32UNALIGNED-NEXT:    addi a1, a1, 1107
; RV32UNALIGNED-NEXT:    sw a1, 4(a0)
; RV32UNALIGNED-NEXT:    lui a1, 365861
; RV32UNALIGNED-NEXT:    addi a1, a1, -1980
; RV32UNALIGNED-NEXT:    sw a1, 0(a0)
; RV32UNALIGNED-NEXT:    ret
;
; RV64UNALIGNED-LABEL: t3:
; RV64UNALIGNED:       # %bb.0: # %entry
; RV64UNALIGNED-NEXT:    lui a1, %hi(.L.str3)
; RV64UNALIGNED-NEXT:    ld a2, %lo(.L.str3)(a1)
; RV64UNALIGNED-NEXT:    addi a1, a1, %lo(.L.str3)
; RV64UNALIGNED-NEXT:    ld a3, 16(a1)
; RV64UNALIGNED-NEXT:    ld a1, 8(a1)
; RV64UNALIGNED-NEXT:    sd a2, 0(a0)
; RV64UNALIGNED-NEXT:    sd a3, 16(a0)
; RV64UNALIGNED-NEXT:    sd a1, 8(a0)
; RV64UNALIGNED-NEXT:    ret
entry:
  tail call void @llvm.memcpy.p0.p0.i64(ptr %C, ptr @.str3, i64 24, i1 false)
  ret void
}

define void @t4(ptr nocapture %C) nounwind {
; RV32ALIGNED-LABEL: t4:
; RV32ALIGNED:       # %bb.0: # %entry
; RV32ALIGNED-NEXT:    lui a1, %hi(.L.str4)
; RV32ALIGNED-NEXT:    addi a1, a1, %lo(.L.str4)
; RV32ALIGNED-NEXT:    li a2, 18
; RV32ALIGNED-NEXT:    tail memcpy@plt
;
; RV64ALIGNED-LABEL: t4:
; RV64ALIGNED:       # %bb.0: # %entry
; RV64ALIGNED-NEXT:    lui a1, %hi(.L.str4)
; RV64ALIGNED-NEXT:    addi a1, a1, %lo(.L.str4)
; RV64ALIGNED-NEXT:    li a2, 18
; RV64ALIGNED-NEXT:    tail memcpy@plt
;
; RV32UNALIGNED-LABEL: t4:
; RV32UNALIGNED:       # %bb.0: # %entry
; RV32UNALIGNED-NEXT:    li a1, 32
; RV32UNALIGNED-NEXT:    sh a1, 16(a0)
; RV32UNALIGNED-NEXT:    lui a1, 132388
; RV32UNALIGNED-NEXT:    addi a1, a1, 1871
; RV32UNALIGNED-NEXT:    sw a1, 12(a0)
; RV32UNALIGNED-NEXT:    lui a1, 337154
; RV32UNALIGNED-NEXT:    addi a1, a1, 69
; RV32UNALIGNED-NEXT:    sw a1, 8(a0)
; RV32UNALIGNED-NEXT:    lui a1, 320757
; RV32UNALIGNED-NEXT:    addi a1, a1, 1107
; RV32UNALIGNED-NEXT:    sw a1, 4(a0)
; RV32UNALIGNED-NEXT:    lui a1, 365861
; RV32UNALIGNED-NEXT:    addi a1, a1, -1980
; RV32UNALIGNED-NEXT:    sw a1, 0(a0)
; RV32UNALIGNED-NEXT:    ret
;
; RV64UNALIGNED-LABEL: t4:
; RV64UNALIGNED:       # %bb.0: # %entry
; RV64UNALIGNED-NEXT:    lui a1, %hi(.L.str4)
; RV64UNALIGNED-NEXT:    ld a2, %lo(.L.str4)(a1)
; RV64UNALIGNED-NEXT:    addi a1, a1, %lo(.L.str4)
; RV64UNALIGNED-NEXT:    ld a1, 8(a1)
; RV64UNALIGNED-NEXT:    li a3, 32
; RV64UNALIGNED-NEXT:    sh a3, 16(a0)
; RV64UNALIGNED-NEXT:    sd a2, 0(a0)
; RV64UNALIGNED-NEXT:    sd a1, 8(a0)
; RV64UNALIGNED-NEXT:    ret
entry:
  tail call void @llvm.memcpy.p0.p0.i64(ptr %C, ptr @.str4, i64 18, i1 false)
  ret void
}

define void @t5(ptr nocapture %C) nounwind {
; RV32ALIGNED-LABEL: t5:
; RV32ALIGNED:       # %bb.0: # %entry
; RV32ALIGNED-NEXT:    sb zero, 6(a0)
; RV32ALIGNED-NEXT:    li a1, 84
; RV32ALIGNED-NEXT:    sb a1, 5(a0)
; RV32ALIGNED-NEXT:    li a1, 83
; RV32ALIGNED-NEXT:    sb a1, 4(a0)
; RV32ALIGNED-NEXT:    li a1, 89
; RV32ALIGNED-NEXT:    sb a1, 3(a0)
; RV32ALIGNED-NEXT:    li a1, 82
; RV32ALIGNED-NEXT:    sb a1, 2(a0)
; RV32ALIGNED-NEXT:    li a1, 72
; RV32ALIGNED-NEXT:    sb a1, 1(a0)
; RV32ALIGNED-NEXT:    li a1, 68
; RV32ALIGNED-NEXT:    sb a1, 0(a0)
; RV32ALIGNED-NEXT:    ret
;
; RV64ALIGNED-LABEL: t5:
; RV64ALIGNED:       # %bb.0: # %entry
; RV64ALIGNED-NEXT:    sb zero, 6(a0)
; RV64ALIGNED-NEXT:    li a1, 84
; RV64ALIGNED-NEXT:    sb a1, 5(a0)
; RV64ALIGNED-NEXT:    li a1, 83
; RV64ALIGNED-NEXT:    sb a1, 4(a0)
; RV64ALIGNED-NEXT:    li a1, 89
; RV64ALIGNED-NEXT:    sb a1, 3(a0)
; RV64ALIGNED-NEXT:    li a1, 82
; RV64ALIGNED-NEXT:    sb a1, 2(a0)
; RV64ALIGNED-NEXT:    li a1, 72
; RV64ALIGNED-NEXT:    sb a1, 1(a0)
; RV64ALIGNED-NEXT:    li a1, 68
; RV64ALIGNED-NEXT:    sb a1, 0(a0)
; RV64ALIGNED-NEXT:    ret
;
; RV32UNALIGNED-LABEL: t5:
; RV32UNALIGNED:       # %bb.0: # %entry
; RV32UNALIGNED-NEXT:    sb zero, 6(a0)
; RV32UNALIGNED-NEXT:    lui a1, 5
; RV32UNALIGNED-NEXT:    addi a1, a1, 1107
; RV32UNALIGNED-NEXT:    sh a1, 4(a0)
; RV32UNALIGNED-NEXT:    lui a1, 365861
; RV32UNALIGNED-NEXT:    addi a1, a1, -1980
; RV32UNALIGNED-NEXT:    sw a1, 0(a0)
; RV32UNALIGNED-NEXT:    ret
;
; RV64UNALIGNED-LABEL: t5:
; RV64UNALIGNED:       # %bb.0: # %entry
; RV64UNALIGNED-NEXT:    sb zero, 6(a0)
; RV64UNALIGNED-NEXT:    lui a1, 5
; RV64UNALIGNED-NEXT:    addiw a1, a1, 1107
; RV64UNALIGNED-NEXT:    sh a1, 4(a0)
; RV64UNALIGNED-NEXT:    lui a1, 365861
; RV64UNALIGNED-NEXT:    addiw a1, a1, -1980
; RV64UNALIGNED-NEXT:    sw a1, 0(a0)
; RV64UNALIGNED-NEXT:    ret
entry:
  tail call void @llvm.memcpy.p0.p0.i64(ptr %C, ptr @.str5, i64 7, i1 false)
  ret void
}

define void @t6() nounwind {
; RV32ALIGNED-LABEL: t6:
; RV32ALIGNED:       # %bb.0: # %entry
; RV32ALIGNED-NEXT:    addi sp, sp, -16
; RV32ALIGNED-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
; RV32ALIGNED-NEXT:    lui a0, %hi(spool.splbuf)
; RV32ALIGNED-NEXT:    addi a0, a0, %lo(spool.splbuf)
; RV32ALIGNED-NEXT:    lui a1, %hi(.L.str6)
; RV32ALIGNED-NEXT:    addi a1, a1, %lo(.L.str6)
; RV32ALIGNED-NEXT:    li a2, 14
; RV32ALIGNED-NEXT:    call memcpy@plt
; RV32ALIGNED-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
; RV32ALIGNED-NEXT:    addi sp, sp, 16
; RV32ALIGNED-NEXT:    ret
;
; RV64ALIGNED-LABEL: t6:
; RV64ALIGNED:       # %bb.0: # %entry
; RV64ALIGNED-NEXT:    addi sp, sp, -16
; RV64ALIGNED-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
; RV64ALIGNED-NEXT:    lui a0, %hi(spool.splbuf)
; RV64ALIGNED-NEXT:    addi a0, a0, %lo(spool.splbuf)
; RV64ALIGNED-NEXT:    lui a1, %hi(.L.str6)
; RV64ALIGNED-NEXT:    addi a1, a1, %lo(.L.str6)
; RV64ALIGNED-NEXT:    li a2, 14
; RV64ALIGNED-NEXT:    call memcpy@plt
; RV64ALIGNED-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
; RV64ALIGNED-NEXT:    addi sp, sp, 16
; RV64ALIGNED-NEXT:    ret
;
; RV32UNALIGNED-LABEL: t6:
; RV32UNALIGNED:       # %bb.0: # %entry
; RV32UNALIGNED-NEXT:    lui a0, %hi(spool.splbuf)
; RV32UNALIGNED-NEXT:    li a1, 88
; RV32UNALIGNED-NEXT:    sh a1, %lo(spool.splbuf+12)(a0)
; RV32UNALIGNED-NEXT:    lui a1, 361862
; RV32UNALIGNED-NEXT:    addi a1, a1, -1960
; RV32UNALIGNED-NEXT:    sw a1, %lo(spool.splbuf+8)(a0)
; RV32UNALIGNED-NEXT:    lui a1, 362199
; RV32UNALIGNED-NEXT:    addi a1, a1, 559
; RV32UNALIGNED-NEXT:    sw a1, %lo(spool.splbuf+4)(a0)
; RV32UNALIGNED-NEXT:    lui a1, 460503
; RV32UNALIGNED-NEXT:    addi a1, a1, 1071
; RV32UNALIGNED-NEXT:    sw a1, %lo(spool.splbuf)(a0)
; RV32UNALIGNED-NEXT:    ret
;
; RV64UNALIGNED-LABEL: t6:
; RV64UNALIGNED:       # %bb.0: # %entry
; RV64UNALIGNED-NEXT:    lui a0, %hi(.L.str6)
; RV64UNALIGNED-NEXT:    ld a0, %lo(.L.str6)(a0)
; RV64UNALIGNED-NEXT:    lui a1, %hi(spool.splbuf)
; RV64UNALIGNED-NEXT:    li a2, 88
; RV64UNALIGNED-NEXT:    sh a2, %lo(spool.splbuf+12)(a1)
; RV64UNALIGNED-NEXT:    sd a0, %lo(spool.splbuf)(a1)
; RV64UNALIGNED-NEXT:    lui a0, 361862
; RV64UNALIGNED-NEXT:    addiw a0, a0, -1960
; RV64UNALIGNED-NEXT:    sw a0, %lo(spool.splbuf+8)(a1)
; RV64UNALIGNED-NEXT:    ret
entry:
  call void @llvm.memcpy.p0.p0.i64(ptr @spool.splbuf, ptr @.str6, i64 14, i1 false)
  ret void
}

%struct.Foo = type { i32, i32, i32, i32 }

define void @t7(ptr nocapture %a, ptr nocapture %b) nounwind {
; RV32-LABEL: t7:
; RV32:       # %bb.0: # %entry
; RV32-NEXT:    lw a2, 12(a1)
; RV32-NEXT:    sw a2, 12(a0)
; RV32-NEXT:    lw a2, 8(a1)
; RV32-NEXT:    sw a2, 8(a0)
; RV32-NEXT:    lw a2, 4(a1)
; RV32-NEXT:    sw a2, 4(a0)
; RV32-NEXT:    lw a1, 0(a1)
; RV32-NEXT:    sw a1, 0(a0)
; RV32-NEXT:    ret
;
; RV64ALIGNED-LABEL: t7:
; RV64ALIGNED:       # %bb.0: # %entry
; RV64ALIGNED-NEXT:    lw a2, 12(a1)
; RV64ALIGNED-NEXT:    sw a2, 12(a0)
; RV64ALIGNED-NEXT:    lw a2, 8(a1)
; RV64ALIGNED-NEXT:    sw a2, 8(a0)
; RV64ALIGNED-NEXT:    lw a2, 4(a1)
; RV64ALIGNED-NEXT:    sw a2, 4(a0)
; RV64ALIGNED-NEXT:    lw a1, 0(a1)
; RV64ALIGNED-NEXT:    sw a1, 0(a0)
; RV64ALIGNED-NEXT:    ret
;
; RV64UNALIGNED-LABEL: t7:
; RV64UNALIGNED:       # %bb.0: # %entry
; RV64UNALIGNED-NEXT:    ld a2, 8(a1)
; RV64UNALIGNED-NEXT:    sd a2, 8(a0)
; RV64UNALIGNED-NEXT:    ld a1, 0(a1)
; RV64UNALIGNED-NEXT:    sd a1, 0(a0)
; RV64UNALIGNED-NEXT:    ret
entry:
  tail call void @llvm.memcpy.p0.p0.i32(ptr align 4 %a, ptr align 4 %b, i32 16, i1 false)
  ret void
}

declare void @llvm.memcpy.p0.p0.i32(ptr nocapture, ptr nocapture, i32, i1) nounwind
declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1) nounwind
